--- title: mahoudata keywords: fastai sidebar: home_sidebar summary: "API details." ---

class PreProcess[source]

PreProcess(ctx)

Preprocess class to include all data preparation functions

PreProcess.clean_duplicates[source]

PreProcess.clean_duplicates()

Clean duplicates method

PreProcess.cols_munging[source]

PreProcess.cols_munging(dataframe, fillna=True)

Columns preparation method

PreProcess.fill_na[source]

PreProcess.fill_na(dataframe, method='median')

Replaces NaN values with method

PreProcess.scale_cols[source]

PreProcess.scale_cols(dataframe)

Min Max scaler for numeric columns

class RecommenderStrategyFactory[source]

RecommenderStrategyFactory(ctx)

Strategy factory

RecommenderStrategyFactory.createStrategy[source]

RecommenderStrategyFactory.createStrategy(strategy)

class NumericStrategy[source]

NumericStrategy(ctx)

Numeric based recommender system

NumericStrategy.model_builder[source]

NumericStrategy.model_builder(dataframe)

NumericStrategy.exec_strategy[source]

NumericStrategy.exec_strategy(dataframe, distance='cosine')

Explore Data

df = pd.read_csv("./data/dataset-datathon.csv")

context = {'numeric_cols' : ['lupulo_afrutado_citrico', 
                             'lupulo_floral_herbal','amargor', 'color', 
                             'maltoso', 'licoroso', 'afrutado', 'especias','acidez']
}
profile = ProfileReport(df, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_notebook_iframe()

Remove duplicates

According to profile there are 60% duplicates. Get rid of them

df_clean = df.drop_duplicates(
subset = df.columns.difference(['vajilla'])
)
profile = ProfileReport(df_clean, title='Pandas Profiling Report', html={'style':{'full_width':True}})
profile.to_notebook_iframe()

Run Recommender

f = RecommenderStrategyFactory(context)

strategy = f.createStrategy('numeric')

datamodel = strategy.model_builder(df_clean)

recommender_df = strategy.exec_strategy(datamodel)

recommender_df
0 1 2 3 4 5 6 7 8 9 ... 472 473 474 475 476 477 478 479 480 481
0 0.000000 0.047415 0.003247 0.018953 0.003421 0.091687 0.074829 0.022629 0.022629 0.111810 ... 0.105950 0.030003 0.044675 0.145143 0.256348 0.039177 0.026858 0.050713 0.024794 0.049222
1 0.047415 0.000000 0.028834 0.012647 0.050154 0.063655 0.053232 0.010847 0.010847 0.095622 ... 0.086773 0.020033 0.032089 0.082647 0.239943 0.027402 0.062483 0.117934 0.015956 0.014698
2 0.003247 0.028834 0.000000 0.011132 0.007807 0.082056 0.057230 0.011863 0.011863 0.104905 ... 0.093416 0.016190 0.033315 0.123624 0.250812 0.025874 0.028131 0.058189 0.012319 0.028870
3 0.018953 0.012647 0.011132 0.000000 0.015547 0.063519 0.058663 0.010086 0.010086 0.080221 ... 0.077451 0.026505 0.017011 0.079057 0.225650 0.022433 0.040835 0.067101 0.020558 0.024629
4 0.003421 0.050154 0.007807 0.015547 0.000000 0.093026 0.079839 0.028052 0.028052 0.104384 ... 0.097089 0.041824 0.036260 0.128222 0.249017 0.040898 0.033240 0.039917 0.036607 0.052592
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
477 0.039177 0.027402 0.025874 0.022433 0.040898 0.040726 0.029397 0.010698 0.010698 0.044549 ... 0.032916 0.021580 0.016716 0.050831 0.149350 0.000000 0.026939 0.048856 0.016969 0.029919
478 0.026858 0.062483 0.028131 0.040835 0.033240 0.049601 0.086540 0.025646 0.025646 0.051417 ... 0.070806 0.045837 0.062186 0.114410 0.136284 0.026939 0.000000 0.045937 0.030399 0.069017
479 0.050713 0.117934 0.058189 0.067101 0.039917 0.102068 0.084820 0.072307 0.072307 0.069321 ... 0.050673 0.091401 0.048418 0.099959 0.157469 0.048856 0.045937 0.000000 0.088857 0.110005
480 0.024794 0.015956 0.012319 0.020558 0.036607 0.060153 0.040360 0.004062 0.004062 0.092834 ... 0.082440 0.003796 0.040091 0.112092 0.224286 0.016969 0.030399 0.088857 0.000000 0.021496
481 0.049222 0.014698 0.028870 0.024629 0.052592 0.107969 0.059386 0.024082 0.024082 0.125342 ... 0.090604 0.021840 0.042406 0.096877 0.265432 0.029919 0.069017 0.110005 0.021496 0.000000

482 rows × 482 columns

recommendations_example = pd.DataFrame(recommender_df[1].sort_values(ascending=True))
recommendations_example
1
1 0.000000e+00
454 1.110223e-16
8 1.084706e-02
7 1.084706e-02
461 1.154931e-02
... ...
430 4.736606e-01
256 4.780530e-01
178 4.815789e-01
205 4.880580e-01
187 5.017451e-01

482 rows × 1 columns